/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package edu.isistan.uima.unified.ecore; import java.io.IOException; import java.net.URL; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.apache.uima.UIMAFramework; import org.apache.uima.UIMARuntimeException; import org.apache.uima.cas.CAS; import org.apache.uima.cas.impl.XmiCasSerializer; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.metadata.AllowedValue; import org.apache.uima.resource.metadata.FeatureDescription; import org.apache.uima.resource.metadata.TypeDescription; import org.apache.uima.resource.metadata.TypeSystemDescription; import org.apache.uima.util.CasCreationUtils; import org.apache.uima.util.InvalidXMLException; import org.apache.uima.util.XMLInputSource; import org.eclipse.emf.common.util.TreeIterator; import org.eclipse.emf.common.util.URI; import org.eclipse.emf.ecore.EAnnotation; import org.eclipse.emf.ecore.EClass; import org.eclipse.emf.ecore.EClassifier; import org.eclipse.emf.ecore.EEnum; import org.eclipse.emf.ecore.EEnumLiteral; import org.eclipse.emf.ecore.EPackage; import org.eclipse.emf.ecore.EStructuralFeature; import org.eclipse.emf.ecore.EcoreFactory; import org.eclipse.emf.ecore.EcorePackage; import org.eclipse.emf.ecore.resource.Resource; import org.eclipse.emf.ecore.resource.ResourceSet; import org.eclipse.emf.ecore.resource.impl.ResourceSetImpl; import org.eclipse.emf.ecore.xmi.impl.XMIResourceFactoryImpl; /** * Converts a UIMA TypeSystemDescription to an Ecore model. */ @SuppressWarnings({ "unchecked", "rawtypes" }) public class UimaTypeSystem2Ecore { /** * Converts a UIMA TypeSystem descriptor to an Ecore model * * @param aUimaTypeSystemFilePath * file path to UIMA TypeSystem descritpor * @param aOutputResource * An EMF Resource to be populated with the Ecore model * @param aOptions * a Map defining options for the conversion. Valid keys for this map are defined as * constants on this class. * * @throws InvalidXMLException * if the TypeSystem descriptor, or one of its imports, is not valid or if there are * duplicate, inconsistent definitions of the same type. * @throws IOException * if an failure occur while reading the descriptor file */ public static void uimaTypeSystem2Ecore(String aUimaTypeSystemFilePath, Resource aOutputResource, Map aOptions) throws InvalidXMLException, IOException { TypeSystemDescription tsDesc = UIMAFramework.getXMLParser().parseTypeSystemDescription( new XMLInputSource(aUimaTypeSystemFilePath)); uimaTypeSystem2Ecore(tsDesc, aOutputResource, aOptions); } /** * Converts a UIMA TypeSystemDescription to an Ecore model * * @param aTypeSystem * UIMA TypeSystemDescription object to convert * @param aOutputResource * An EMF Resource to be populated with the Ecore model * @param aOptions * a Map defining options for the conversion. Valid keys for this map are defined as * constants on this class. * * @throws InvalidXMLException * if the TypeSystem descriptor imports another descriptor that could not be * successfully parsed, or if there are duplicate, inconsistent definitions of the same * type. */ public static void uimaTypeSystem2Ecore(TypeSystemDescription aTypeSystem, Resource aOutputResource, Map aOptions) throws InvalidXMLException { uimaTypeSystem2Ecore(aTypeSystem, aOutputResource, aOptions, null); } /** * Converts a UIMA TypeSystemDescription to an Ecore model * * @param aTypeSystem * UIMA TypeSystemDescription object to convert * @param aOutputResource * An EMF Resource to be populated with the Ecore model * @param aOptions * a Map defining options for the conversion. Valid keys for this map are defined as * constants on this class. * @param aSchemaLocationMap * optional parameter - if non-null, this map will be populated with (Namespace URI, * Schema Location) pairs, suitable for inclusion in the "schemaLocation" attribute of * XMI instance documents. */ public static void uimaTypeSystem2Ecore(TypeSystemDescription aTypeSystem, Resource aOutputResource, Map aOptions, Map aSchemaLocationMap) throws InvalidXMLException { // Add the default definition of uima.tcas.DocumentAnnotation. If the // user also defines this type (with additional features), it will be merged // with this. First clone the aTypeSystem object so user won't notice // we have added a new type definition to their TypeSystemDescription. aTypeSystem = (TypeSystemDescription) aTypeSystem.clone(); TypeDescription docAnnotType = aTypeSystem.addType("uima.tcas.DocumentAnnotation", "", "uima.tcas.Annotation"); docAnnotType.addFeature("language", "", "uima.cas.String"); // resolve imports aTypeSystem.resolveImports(); // merge, to eliminate duplicate type definitions try { aTypeSystem = CasCreationUtils.mergeTypeSystems(Arrays.asList(new TypeSystemDescription[] { aTypeSystem })); } catch (ResourceInitializationException e) { throw new InvalidXMLException(e); } if (aOptions == null) { aOptions = Collections.EMPTY_MAP; } // load Ecore model for the UIMA Built-in types ResourceSet resSet = aOutputResource.getResourceSet(); if (resSet == null) { resSet = new ResourceSetImpl(); resSet.getResources().add(aOutputResource); } loadUimaBuiltinsEcore(resSet, aSchemaLocationMap); // Do this in two passes. First pass creates EPackages, EClasses, and EEnums (for string // subtypes) // Second pass sets supertypes and creates EStructuralFeatures TypeDescription[] types = aTypeSystem.getTypes(); EPackage firstPackage = null; for (int i = 0; i < types.length; i++) { TypeDescription type = types[i]; EClassifier eclassifier = uimaType2EClassifier(type, aOptions); // EPackages may also have been created. Add the root EPackage to the resource. EPackage rootPackage = eclassifier.getEPackage(); while (rootPackage.getESuperPackage() != null) rootPackage = rootPackage.getESuperPackage(); aOutputResource.getContents().add(rootPackage); if (aSchemaLocationMap != null) { String schemaLoc = aOutputResource.getURI() + "#" + aOutputResource.getURIFragment(eclassifier.getEPackage()); aSchemaLocationMap.put(eclassifier.getEPackage().getNsURI(), schemaLoc); } if (firstPackage == null) { firstPackage = eclassifier.getEPackage(); } } // Now make second pass to set supertype and create feautres for (int i = 0; i < types.length; i++) { TypeDescription type = types[i]; EClassifier eclassifier = lookupEClassifierForType(type.getName()); if (eclassifier instanceof EClass) { EClass eclass = (EClass) eclassifier; // set supertype String supertypeName = type.getSupertypeName(); EClassifier superclass = lookupEClassifierForType(supertypeName); // creates EClass if not // already existing eclass.getESuperTypes().add((EClass)superclass); // set features FeatureDescription[] features = type.getFeatures(); for (int j = 0; j < features.length; j++) { eclass.getEStructuralFeatures() .add(uimaFeature2EStructuralFeature(features[j], aOptions)); } } } // add descriptive type system attributes as EAnnotations on first package EAnnotation eannot = EcoreFactory.eINSTANCE.createEAnnotation(); eannot.setSource("http://uima.apache.org"); if (aTypeSystem.getName() != null && aTypeSystem.getName().length() > 0) eannot.getDetails().put("name", aTypeSystem.getName()); if (aTypeSystem.getDescription() != null && aTypeSystem.getDescription().length() > 0) eannot.getDetails().put("description", aTypeSystem.getDescription()); if (aTypeSystem.getVersion() != null && aTypeSystem.getVersion().length() > 0) eannot.getDetails().put("version", aTypeSystem.getVersion()); if (aTypeSystem.getVendor() != null && aTypeSystem.getVendor().length() > 0) eannot.getDetails().put("vendor", aTypeSystem.getVendor()); firstPackage.getEAnnotations().add(eannot); } private static Resource loadUimaBuiltinsEcore(ResourceSet resourceSet, Map aSchemaLocationMap) { // load Ecore model for UIMA built-in types (use classloader to locate) URL uimaEcoreUrl = UimaTypeSystem2Ecore.class.getResource("/uima.ecore"); if (uimaEcoreUrl == null) { throw new UIMARuntimeException(UIMARuntimeException.UIMA_ECORE_NOT_FOUND, new Object[0]); } Resource uimaEcoreResource = resourceSet.getResource(URI.createURI(uimaEcoreUrl.toString()), true); // register core UIMA packages (I'm surprised I need to do this manually) TreeIterator iter = uimaEcoreResource.getAllContents(); while (iter.hasNext()) { Object current = iter.next(); if (current instanceof EPackage) { EPackage pkg = (EPackage) current; EPackage.Registry.INSTANCE.put(pkg.getNsURI(), pkg); if (aSchemaLocationMap != null) { String schemaLoc = uimaEcoreResource.getURI() + "#" + uimaEcoreResource.getURIFragment(pkg); aSchemaLocationMap.put(pkg.getNsURI(), schemaLoc); } } } return uimaEcoreResource; } private static EClassifier uimaType2EClassifier(TypeDescription aType, Map aOptions) { // separate name into package name and class name String fullTypeName = aType.getName(); String uimaNamespace, shortTypeName; int lastDot = fullTypeName.lastIndexOf('.'); if (lastDot <= 0) { uimaNamespace = null; shortTypeName = fullTypeName; } else { uimaNamespace = fullTypeName.substring(0, lastDot); shortTypeName = fullTypeName.substring(lastDot + 1); } // does EPackage already exist for this URI? EPackage ePackage = uimaNamespace2EPackage(uimaNamespace); EClassifier eclassifier; // if aType is a "subtype" of uima.cas.String, create an EEnum for it if (CAS.TYPE_NAME_STRING.equals(aType.getSupertypeName())) { eclassifier = EcoreFactory.eINSTANCE.createEEnum(); AllowedValue[] vals = aType.getAllowedValues(); for (int i = 0; i < vals.length; i++) { EEnumLiteral literal = EcoreFactory.eINSTANCE.createEEnumLiteral(); literal.setValue(i); literal.setName(vals[i].getString()); if (vals[i].getDescription() != null && vals[i].getDescription().length() > 0) { EAnnotation eannot = EcoreFactory.eINSTANCE.createEAnnotation(); eannot.setSource("http://uima.apache.org"); eannot.getDetails().put("description", vals[i].getDescription()); literal.getEAnnotations().add(eannot); } ((EEnum) eclassifier).getELiterals().add(literal); } } else { // create EClass eclassifier = EcoreFactory.eINSTANCE.createEClass(); } // set name of EClassifier eclassifier.setName(shortTypeName); // add to package ePackage.getEClassifiers().add(eclassifier); // set description as EAnnotation if (aType.getDescription() != null && aType.getDescription().length() > 0) { EAnnotation eannot = EcoreFactory.eINSTANCE.createEAnnotation(); eannot.setSource("http://uima.apache.org"); eannot.getDetails().put("description", aType.getDescription()); eclassifier.getEAnnotations().add(eannot); } return eclassifier; } private static EStructuralFeature uimaFeature2EStructuralFeature(FeatureDescription aFeature, Map aOptions) { String range = aFeature.getRangeTypeName(); boolean multiRefAllowed = aFeature.getMultipleReferencesAllowed() == null ? false : aFeature .getMultipleReferencesAllowed().booleanValue(); EStructuralFeature efeat; // map primitive types to EAttributes if (CAS.TYPE_NAME_STRING.equals(range)) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEString()); } else if (CAS.TYPE_NAME_INTEGER.equals(range)) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEInt()); } else if (CAS.TYPE_NAME_FLOAT.equals(range)) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEFloat()); } else if (CAS.TYPE_NAME_BYTE.equals(range)) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEByte()); } else if (CAS.TYPE_NAME_SHORT.equals(range)) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEShort()); } else if (CAS.TYPE_NAME_LONG.equals(range)) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getELong()); } else if (CAS.TYPE_NAME_DOUBLE.equals(range)) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEDouble()); } else if (CAS.TYPE_NAME_BOOLEAN.equals(range)) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEBoolean()); } // map arrays and lists to multivalued EAttributes if multiple references not allowed else if ((CAS.TYPE_NAME_STRING_ARRAY.equals(range) || CAS.TYPE_NAME_STRING_LIST.equals(range)) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEString()); efeat.setUpperBound(-1); } else if ((CAS.TYPE_NAME_INTEGER_ARRAY.equals(range) || CAS.TYPE_NAME_INTEGER_LIST .equals(range)) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEInt()); efeat.setUpperBound(-1); } else if ((CAS.TYPE_NAME_FLOAT_ARRAY.equals(range) || CAS.TYPE_NAME_FLOAT_LIST.equals(range)) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEFloat()); efeat.setUpperBound(-1); } else if (CAS.TYPE_NAME_SHORT_ARRAY.equals(range) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEShort()); efeat.setUpperBound(-1); } else if (CAS.TYPE_NAME_LONG_ARRAY.equals(range) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getELong()); efeat.setUpperBound(-1); } else if (CAS.TYPE_NAME_DOUBLE_ARRAY.equals(range) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEDouble()); efeat.setUpperBound(-1); } else if (CAS.TYPE_NAME_BOOLEAN_ARRAY.equals(range) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEBoolean()); efeat.setUpperBound(-1); } // Ecore has a special type EByteArray that we use instead of a // multi-valued EByte property. This gives a slightly more efficient // serialization. else if (CAS.TYPE_NAME_BYTE_ARRAY.equals(range) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); efeat.setEType(EcorePackage.eINSTANCE.getEByteArray()); } // FSArrays and FSLists map to multivalued references if multiple references not allowed else if ((CAS.TYPE_NAME_FS_ARRAY.equals(range) || CAS.TYPE_NAME_FS_LIST.equals(range)) && !multiRefAllowed) { efeat = EcoreFactory.eINSTANCE.createEReference(); String elementType = aFeature.getElementType(); if (elementType == null) { elementType = CAS.TYPE_NAME_TOP; } efeat.setEType(lookupEClassifierForType(elementType)); efeat.setUpperBound(-1); } else // non-primitive, non-array, non-list type. // map to EAttribute if it's an EEnum, otherwise map to EReference { EClassifier etype = lookupEClassifierForType(range); if (etype instanceof EEnum) { efeat = EcoreFactory.eINSTANCE.createEAttribute(); } else { efeat = EcoreFactory.eINSTANCE.createEReference(); } efeat.setEType(etype); } efeat.setName(aFeature.getName()); // use EAnnotation to record: // - the description of the feature // - for multi-valued properties, the name of the UIMA type used to // implement it (to distinguish between array and list) // - for FSList or FSArray that are NOT represented by multi-valued // properties, the element type if ((aFeature.getDescription() != null && aFeature.getDescription().length() > 0) || efeat.isMany() || aFeature.getElementType() != null) { EAnnotation eannot = EcoreFactory.eINSTANCE.createEAnnotation(); eannot.setSource("http://uima.apache.org"); if (aFeature.getDescription() != null && aFeature.getDescription().length() > 0) { eannot.getDetails().put("description", aFeature.getDescription()); } if (efeat.isMany()) { eannot.getDetails().put("uimaType", aFeature.getRangeTypeName()); } if (!efeat.isMany() && aFeature.getElementType() != null) { eannot.getDetails().put("elementType", aFeature.getElementType()); } efeat.getEAnnotations().add(eannot); } return efeat; } private static EClassifier lookupEClassifierForType(String aFullTypeName) { // separate name into package name and class name String uimaNamespace, shortTypeName; int lastDot = aFullTypeName.lastIndexOf('.'); if (lastDot <= 0) { uimaNamespace = null; shortTypeName = aFullTypeName; } else { uimaNamespace = aFullTypeName.substring(0, lastDot); shortTypeName = aFullTypeName.substring(lastDot + 1); } String nsUri = uimaNamespace2NamespaceUri(uimaNamespace); // does EPackage already exist for this URI? EPackage ePackage = EPackage.Registry.INSTANCE.getEPackage(nsUri); if (ePackage == null) { return null; } return ePackage.getEClassifier(shortTypeName); } /** * Gets or creates an EPackage for a UIMA namespace. Actually will create a whole chain of nested * EPackages, one for each component of the UIMA namespace, but only the leaf node of the chain * will be returned. * * @param uimaNamespace * UIMA namespace * @return EPackage corresponding to this namespace. */ private static EPackage uimaNamespace2EPackage(String uimaNamespace) { // convert UIMA namespace (dotted string) to namespace URI String nsUri = uimaNamespace2NamespaceUri(uimaNamespace); // see if package already exists for this URI EPackage ePackage = EPackage.Registry.INSTANCE.getEPackage(nsUri); if (ePackage == null) { // package name is last component of namespace. // all other components form the parent namespace String parentNamespace = null; String packageName; if (uimaNamespace != null) { int lastDot = uimaNamespace.lastIndexOf('.'); packageName = uimaNamespace.substring(lastDot + 1); if (lastDot > 0) { parentNamespace = uimaNamespace.substring(0, lastDot); } } else { packageName = "noNamespace"; } // create Package ePackage = EcoreFactory.eINSTANCE.createEPackage(); ePackage.setNsURI(nsUri); ePackage.setName(packageName); EPackage.Registry.INSTANCE.put(nsUri, ePackage); // get or create SuperPackage if any if (parentNamespace != null) { EPackage superPackage = uimaNamespace2EPackage(parentNamespace); superPackage.getESubpackages().add(ePackage); } } return ePackage; } private static String uimaNamespace2NamespaceUri(String uimaNamespace) { if (uimaNamespace == null || uimaNamespace.length() == 0) { return XmiCasSerializer.DEFAULT_NAMESPACE_URI; } // Our convention is that the Namespace URI is "http:///", followed by the UIMA namespace, with // dots converted to slashes, and with ".ecore" appended. (This is EMF's convention for // constructing a namespace URI from a Java package name.) return "http:///" + uimaNamespace.replace('.', '/') + ".ecore"; } /** * Main program. Takes two arguments: the filename of an input TypeSystem descriptor file and the * filename of the Ecore/XMI file to generate. */ public static void main(String[] args) throws Exception { // register default resource factory Resource.Factory.Registry.INSTANCE.getExtensionToFactoryMap().put("*", new XMIResourceFactoryImpl()); ResourceSet resourceSet = new ResourceSetImpl(); URI outputURI = URI.createFileURI(args[1]); Resource outputResource = resourceSet.createResource(outputURI); Map options = new HashMap(); // options.put(OPTION_PRESERVE_UIMA_LIST_TYPES, Boolean.TRUE); uimaTypeSystem2Ecore(args[0], outputResource, options); outputResource.save(null); } }